#DESCRIPTION:

#ARGUMENTS:
# file:  A csv file containing mass, volume, density, and porosity of volcanic deposits. The csv file  
#sep:    The colums of the csv file can be separated with commas (","), semicolons (";") or spaces (""). The default field separator is commas.
#header: The default value is TRUE, this means that each colum has its own header. 
#run:    It is the number of ramdom runs that will be performed to compute the sample stability when the average density is weighted by volume.  The default value is 1000
#probs:  The random runs are divide in quantiles. The probs vector gives the percentages. 
#pq:     Quantile of 95% (0.95) for the random runs.
#pe:     Closest sample size for errors of 1% and 5% (0.01,0.05) 
#breaks: Intervals used to compute the histograms. The default vector spams from 0% to 100% of porosity in steps of 5%.
#minmax: If this argument is TRUE the plotting limits in the X-axis are given  
#        by the minimum and maximum porosity values of the field data. 
#plot:  The default value is TRUE. All the plots are shown in the screen.
#pdf:   The default value is TRUE. All the plots are printed into pdf files.   
#txt:   The default value is TRUE. A txt file is written into the disk with the results.
#Example for running: 1.- source("stats.R")
#                     2.- results<-stats("fielddata.csv")
stats <- function(file,sep=",",run=1000,header=TRUE,probs=seq(0,100,5)/100,pq=0.95,pe=c(0.01,0.05),breaks=seq(0,1,0.05),minmax=TRUE,plot=TRUE,pdf=TRUE,txt=TRUE){
#Read csv file
a<-read.csv(file,sep=sep,header=header)
#Find max and min values of mass, volume, density (rho) and porosity (phi)
m<-range(a[,1],na.rm=TRUE);v<-range(a[,2],na.rm=TRUE);rho<-range(a[,3],na.rm=TRUE);phi<-range(a[,4],na.rm=TRUE)
#Computing the total mass and volume.
tm<-sum(a[,1],na.rm=TRUE);tv<-sum(a[,2],na.rm=TRUE)
#Number of sample in the file
n<-dim(a)[1]
#Standard desviations of density and porosity
srho<-sqrt(var(a[,3],na.rm=TRUE));sphi<-sqrt(var(a[,4],na.rm=TRUE))
#Frequencial averages of mass, volume, density and porosity
fam<-mean(a[,1],na.rm=TRUE);fav<-mean(a[,2],na.rm=TRUE);farho<-mean(a[,3],na.rm=TRUE);faphi<-mean(a[,4],na.rm=TRUE)
#Weighting vector: volume
wv<-a[,2]/sum(a[,2],na.rm=TRUE)
#Density average weigthed by volume
wvarho<-sum(wv*a[,3],na.rm=TRUE)
#Vesicularity average weigthed by volume
wvaphi<-sum(wv*a[,4],na.rm=TRUE)

#Graphical statistics Imman (1952), Folk and Ward (1957) for density
grho<-as.numeric(quantile(a[,3],probs=c(0.05,0.15,0.16,0.25,0.5,0.75,0.84,0.95)))
gmedianrho<-grho[5]
gsigmarho<-(grho[7]-grho[3])/2
gskrho<-(grho[7]+grho[3]-2*grho[5])/(2*(grho[7]-grho[3]))
#####
gmeanrho<-(grho[3]+grho[5]+grho[7])/3
gsigmairho<-(grho[7]-grho[3])/4 + (grho[8]-grho[1])/6.6
gksirho<-(grho[7]+grho[3]-2*grho[5])/(2*(grho[7]-grho[3])) + (grho[8]+grho[1]-2*grho[5])/(2*(grho[8]-grho[1]))
gkgrho<- (grho[8]-grho[1])/(2.44*(grho[6]-grho[4]))

#Graphical statistics Imman (1952), Folk and Ward (1957) for porosity
gphi<-as.numeric(quantile(a[,4],probs=c(0.05,0.15,0.16,0.25,0.5,0.75,0.84,0.95)))
gmedianphi<-gphi[5]
gsigmaphi<-(gphi[7]-gphi[3])/2
gskphi<-(gphi[7]+gphi[3]-2*gphi[5])/(2*(gphi[7]-gphi[3]))
#####
gmeanphi<-(gphi[3]+gphi[5]+gphi[7])/3
gsigmaiphi<-(gphi[7]-gphi[3])/4 + (gphi[8]-gphi[1])/6.6
gksiphi<-(gphi[7]+gphi[3]-2*gphi[5])/(2*(gphi[7]-grho[3])) + (gphi[8]+gphi[1]-2*gphi[5])/(2*(gphi[8]-gphi[1]))
gkgphi<- (gphi[8]-gphi[1])/(2.44*(gphi[6]-gphi[4]))


#To compute porosity histograms. Histogram spacing is based on breaks vector
phih<-NULL
vh<-NULL
labels<-NULL
for (k in 1:(length(breaks)-1)){
id<-which(a[,4]>=breaks[k] & a[,4]<breaks[k+1])
phih<-c(phih,length(id))
vh<-c(vh,sum(wv[id],na.rm=TRUE))
labels<-c(labels,paste(breaks[k],"-",breaks[k+1],sep=""))
if(phi[1]>=breaks[k] & phi[1]<breaks[k+1]) {il<-k } #min value in the histogram plot
if(phi[2]>=breaks[k] & phi[2]<breaks[k+1]) {sl<-k } #max value in the histogram plot
                               }
phih<-phih/n

#To compute density histograms.
rhoh<-NULL
lrho<-NULL
brho<-seq((range(a[,3])[1]%/%100)*100,(range(a[,3])[2]%/%100+1)*100,100)
for (k in 1:(length(brho)-1)){
id<-which(a[,3]>=brho[k] & a[,3]<brho[k+1])
rhoh<-c(rhoh,sum(wv[id]))
lrho<-c(lrho,paste(brho[k],"-",brho[k+1],sep=""))
                              }

#The matrix cointaining all the histogram values is define
H<-rbind(vh,phih)
#If the plot parameter minmax is define, the condition is applied 
if(minmax){H<-H[,il:sl];labels<-labels[il:sl]}
#Random runs of sample for computing the absoulte error of the volumen and density.
V<-matrix(NA,run,n)
RHO<-matrix(NA,run,n)
for (j in 1:run){
ij<-sample(1:n)
vj<-a[ij,2]
rhoj<-a[ij,3]
vij<-NULL
rhoij<-NULL
for (i in 1:n){
wvij<-vj[1:i]/sum(vj[1:i],na.rm=TRUE)
vi<-sum(wvij*rhoj[1:i],na.rm=TRUE);vij<-c(vij,abs(vi-wvarho)/wvarho)
rhoi<-mean(rhoj[1:i],na.rm=TRUE);rhoij<-c(rhoij,abs(rhoi-farho)/farho)
}
V[j,]<-vij
RHO[j,]<-rhoij
}
#Computing quantile matrices for mass, volumen and density random runs.
QV<-matrix(NA,length(probs),n)
QRHO<-matrix(NA,length(probs),n)

for (w in 1:n){
QV[,w]<-quantile(V[,w],probs=probs,na.rm=TRUE)
QRHO[,w]<-quantile(RHO[,w],probs=probs,na.rm=TRUE)
}
#Finding the n values closest to the absolute errors defined in the pe vector.
qi<-which(probs == pq)
nwmrho<-NULL
nwvrho<-NULL
for (s in 1:length(pe)){
dwvrho<-abs(QV[qi,]-pe[s])
nwvrhos<-which(dwvrho == min(dwvrho))
nwvrho<-c(nwvrho,nwvrhos)
}
#Putting together all the n values related with each pe entry
N<-rbind(nwvrho)
#slope, considering the absolute error percentages provided in the pe vector
sswvrho<-NULL
ssrho<-NULL
for (alpha in 1:length(pe)){
sswvalpha<-QV[qi,nwvrho[alpha]]*100/(n-nwvrho[alpha])
sswvrho<-c(sswvrho,sswvalpha)
}
#Prefix name
bn<-basename(file)
fn<-substr(bn,0,nchar(bn)-4)
#Plotting section
#Histograms plot of  volumen and density
plotbc <- function() {
barplot(100*H,beside=TRUE,names.arg=labels,cex.names=0.7,ylab="Abundance (%)",xlab="Porosity (Phi)",legend.text=c("Weighted by volume","Frequency"),main=paste("Abundance Histogram Comparison: Porosity; n=",n),las=2)
                     }
#Ploting of the ramdom runs for the pq quantile
plotre <- function() {
plot(QV[qi,]*100,type="o",cex=0.5,pch=19,col="black",xlab="Sample Size",ylab="2-sigma absolute error (%)",main="Stability curve")
abline(h=5,col=rgb(0,139/255,0),lty=4,lwd=1.5)
abline(h=1,col=rgb(0,139/255,0),lty=4,lwd=1.5)
text(c(1,1),c(6,2),label=c("5%","1%"),col=rgb(0,139/255,0),cex=0.75)
legend(floor(n*0.7),max(QV[qi,])*100*0.95,legend=c(fn,paste("n:",n),paste("runs=",run)),cex=0.9)
                     }
#Cumulative plots and histograms for density and porosity
ploter<- function() {
par(mfrow=c(2,2))
barplot(100*rhoh,main="Density histogram",names.arg=lrho,cex.names=0.48,xlab="Density (kg/m^3)",ylab="Abundance (vol. %)",las=2)
ej<-order(a[,3])
plot(a[ej,3],100*cumsum(wv[ej]),type="o",main="Density cumulative plot",xlab="Density (kg/m^3)",ylab="Cumulative abundance (vol. %)")
abline(h=100*c(0.05,0.16,0.25,0.5,0.75,0.84,0.95),col=rgb(0,139/255,0),lty=4,lwd=0.8)
axis(4,at=100*c(0.05,0.16,0.25,0.5,0.75,0.84,0.95),label=100*c(0.05,0.16,0.25,0.5,0.75,0.84,0.95),col=rgb(0,139/255,0),col.axis= rgb(0,139/255,0),cex.axis=0.75,las=1)
barplot(100*H[1,],main="Porosity histogram",names.arg=labels,cex.names=0.5,xlab="Porosity (%)",ylab="Abundance (vol. %)",las=2)
ej<-order(a[,4])
plot(100*a[ej,4],100*cumsum(wv[ej]),type="o",main="Porosity cumulative plot" ,xlab="Porosity (%)",ylab="Cumulative abundance (vol. %)")
abline(h=100*c(0.05,0.16,0.25,0.5,0.75,0.84,0.95),col=rgb(0,139/255,0),lty=4,lwd=0.8)
#text(c(4,4,4,4,4,4,4),c(7,18,27,52,77,86,97),label=c("5%","16%","25%","50%","75%","84%","95%"),col=rgb(0,139/255,0),cex=0.6)
axis(4,at=100*c(0.05,0.16,0.25,0.5,0.75,0.84,0.95),label=100*c(0.05,0.16,0.25,0.5,0.75,0.84,0.95),col=rgb(0,139/255,0),col.axis= rgb(0,139/255,0),cex.axis=0.75,las=1)

		    } 
#Printing the plots to pdf
if(pdf){pdf(file=paste(fn,"-bc.pdf",sep=""));plotbc();dev.off()
        pdf(file=paste(fn,"-re.pdf",sep=""));plotre();dev.off()
	 pdf(file=paste(fn,"-er.pdf",sep=""));ploter();dev.off()
}
#Bringing the plot into the screen
if(plot){plotbc();x11();plotre();x11();ploter()}
#Defining parameters that will print after running the script.
item<-c("n","minm","maxm","fam","tm","minv","maxv","fav","tv","minrho","maxrho","stdrho","farho","wvarho","minphi","maxphi","stdphi","faphi","wvaphi","nwvrho_1%","nwvrho_5%","sswvrho_%1","sswvrho_%5","gmedianrho","gsigmarho","gskrho","gmeanrho","gsigmairho","gksirho","gkgrho","gmedianphi","gsigmaphi","gskphi","gmeanphi","gsigmaiphi","gksiphi","gkgphi")
value<-c(n,m[1],m[2],fam,tm,v[1],v[2],fav,tv,rho[1],rho[2],srho,farho,wvarho,phi[1],phi[2],sphi,faphi,wvaphi,nwvrho[1],nwvrho[2],sswvrho[1],sswvrho[2],gmedianrho,gsigmarho,gskrho,gmeanrho,gsigmairho,gksirho,gkgrho,gmedianphi,gsigmaphi,gskphi,gmeanphi,gsigmaiphi,gksiphi,gkgphi)
sr<-data.frame(item,value)
if (txt) { write.table(sr,file=paste(fn,"-R.txt",sep=""),col.names=TRUE,row.names=FALSE,quote=FALSE)}
#Printing the parameters.
print(sr)
#All the computed parameters are stored in the metacharacter st
#Defining st as a list
st<-NULL
st<-as.list(st)
#Setting up all the result parameters in st
st$n<-n ; st$minm<-m[1];st$maxm<-m[2];st$fam<-fam;st$tm<-tm; st$minv<-v[1];st$maxv<-v[2];st$fav<-fav;st$tv<-tv;st$minrho<-rho[1];st$maxrho<-rho[2];
st$stdrho<-srho;st$farho<-farho;st$wvarho<-wvarho;st$minphi<-phi[1];st$maxphi<-phi[2];st$stdphi<-sphi;
st$faphi<-faphi;st$wvaphi<-wvaphi;st$nwvrho<-nwvrho;
st$sswvrho<-sswvrho;st$ssrho<-ssrho;st$H<-H;st$wv<-wv;st$phi<-a[,4]
st$gmedianrho<-gmedianrho;st$gsigmarho<-gsigmarho;st$gskrho<-gskrho;st$gmeanrho<-gmeanrho;st$gsigmairho<-gsigmairho;st$gksirho<-gksirho;
st$gkgrho<-gkgrho
st$gmedianphi<-gmedianphi;st$gsigmaphi<-gsigmaphi;st$gskphi<-gskphi;st$gmeanphi<-gmeanphi;st$gsigmaiphi<-gsigmaiphi;st$gksiphi<-gksiphi;
st$gkgphi<-gkgphi

return(st)
}
